Skip to content

Conversation

@Rajveer100
Copy link
Member

Resolves #150606

Currently ssa.copy is used mostly for straight line code, i.e, without joins or uses of phi nodes. With this, passes would be able to pick up the relevant info and further optimize the IR.

@llvmbot
Copy link
Member

llvmbot commented Jul 29, 2025

@llvm/pr-subscribers-llvm-transforms

Author: Rajveer Singh Bharadwaj (Rajveer100)

Changes

Resolves #150606

Currently ssa.copy is used mostly for straight line code, i.e, without joins or uses of phi nodes. With this, passes would be able to pick up the relevant info and further optimize the IR.


Full diff: https://github.com/llvm/llvm-project/pull/151132.diff

2 Files Affected:

  • (modified) llvm/include/llvm/Transforms/Utils/PredicateInfo.h (+13-1)
  • (modified) llvm/lib/Transforms/Utils/PredicateInfo.cpp (+189)
diff --git a/llvm/include/llvm/Transforms/Utils/PredicateInfo.h b/llvm/include/llvm/Transforms/Utils/PredicateInfo.h
index c243e236901d5..dfaeec04daff1 100644
--- a/llvm/include/llvm/Transforms/Utils/PredicateInfo.h
+++ b/llvm/include/llvm/Transforms/Utils/PredicateInfo.h
@@ -67,7 +67,7 @@ class Value;
 class IntrinsicInst;
 class raw_ostream;
 
-enum PredicateType { PT_Branch, PT_Assume, PT_Switch };
+enum PredicateType { PT_Branch, PT_Assume, PT_Switch, PT_PHI };
 
 /// Constraint for a predicate of the form "cmp Pred Op, OtherOp", where Op
 /// is the value the constraint applies to (the ssa.copy result).
@@ -171,6 +171,18 @@ class PredicateSwitch : public PredicateWithEdge {
   }
 };
 
+class PredicatePHI : public PredicateBase {
+public:
+  BasicBlock *PHIBlock;
+  SmallVector<std::pair<BasicBlock *, PredicateBase *>, 4> IncomingPredicates;
+
+  PredicatePHI(Value *Op, BasicBlock *PHIBB)
+      : PredicateBase(PT_PHI, Op, nullptr), PHIBlock(PHIBB) {}
+  static bool classof(const PredicateBase *PB) { return PB->Type == PT_PHI; }
+
+  LLVM_ABI std::optional<PredicateConstraint> getConstraint() const;
+};
+
 /// Encapsulates PredicateInfo, including all data associated with memory
 /// accesses.
 class PredicateInfo {
diff --git a/llvm/lib/Transforms/Utils/PredicateInfo.cpp b/llvm/lib/Transforms/Utils/PredicateInfo.cpp
index ac413c9b58152..6e5e97e97849c 100644
--- a/llvm/lib/Transforms/Utils/PredicateInfo.cpp
+++ b/llvm/lib/Transforms/Utils/PredicateInfo.cpp
@@ -214,6 +214,8 @@ class PredicateInfoBuilder {
   // whether it returned a valid result.
   DenseMap<Value *, unsigned int> ValueInfoNums;
 
+  DenseMap<BasicBlock *, SmallVector<Value *, 4>> PHICandidates;
+
   BumpPtrAllocator &Allocator;
 
   ValueInfo &getOrCreateValueInfo(Value *);
@@ -225,6 +227,13 @@ class PredicateInfoBuilder {
                      SmallVectorImpl<Value *> &OpsToRename);
   void processSwitch(SwitchInst *, BasicBlock *,
                      SmallVectorImpl<Value *> &OpsToRename);
+  void identifyPHIInsertionPoints(SmallVectorImpl<Value *> &OpsToRename);
+  bool needsPHIForPredicateInfo(Value *Op, BasicBlock *BB);
+  void insertPredicatePHIs(Value *Op, BasicBlock *BB);
+  void computePredicateIDF(Value *Op,
+                           const SmallPtrSet<BasicBlock *, 8> &DefiningBlocks,
+                           SmallPtrSet<BasicBlock *, 8> &PHIBlocks);
+  void processPredicatePHIs(SmallVectorImpl<Value *> &OpsToRename);
   void renameUses(SmallVectorImpl<Value *> &OpsToRename);
   void addInfoFor(SmallVectorImpl<Value *> &OpsToRename, Value *Op,
                   PredicateBase *PB);
@@ -453,6 +462,155 @@ void PredicateInfoBuilder::processSwitch(
   }
 }
 
+void PredicateInfoBuilder::identifyPHIInsertionPoints(
+    SmallVectorImpl<Value *> &OpsToRename) {
+  for (Value *Op : OpsToRename) {
+    const auto &ValueInfo = getValueInfo(Op);
+    SmallPtrSet<BasicBlock *, 8> DefiningBlocks;
+
+    for (const auto *PInfo : ValueInfo.Infos) {
+      if (auto *PBranch = dyn_cast<PredicateBranch>(PInfo)) {
+        DefiningBlocks.insert(PBranch->To);
+      } else if (auto *PSwitch = dyn_cast<PredicateSwitch>(PInfo)) {
+        DefiningBlocks.insert(PSwitch->To);
+      } else if (auto *PAssume = dyn_cast<PredicateAssume>(PInfo)) {
+        DefiningBlocks.insert(PAssume->AssumeInst->getParent());
+      }
+    }
+
+    if (DefiningBlocks.size() > 1) {
+      SmallPtrSet<BasicBlock *, 8> PHIBlocks;
+      computePredicateIDF(Op, DefiningBlocks, PHIBlocks);
+
+      for (BasicBlock *PHIBlock : PHIBlocks) {
+        if (needsPHIForPredicateInfo(Op, PHIBlock)) {
+          PHICandidates[PHIBlock].push_back(Op);
+        }
+      }
+    }
+  }
+}
+
+void PredicateInfoBuilder::computePredicateIDF(
+    Value *Op, const SmallPtrSet<BasicBlock *, 8> &DefiningBlocks,
+    SmallPtrSet<BasicBlock *, 8> &PHIBlocks) {
+
+  SmallVector<BasicBlock *, 8> Worklist(DefiningBlocks.begin(),
+                                        DefiningBlocks.end());
+
+  while (!Worklist.empty()) {
+    BasicBlock *BB = Worklist.pop_back_val();
+
+    DomTreeNode *Node = DT.getNode(BB);
+    if (!Node)
+      continue;
+
+    for (BasicBlock *Succ : successors(BB)) {
+      if (!DT.dominates(BB, Succ)) {
+        BasicBlock *IDom = DT.getNode(BB)->getIDom()->getBlock();
+        if (DT.dominates(IDom, Succ)) {
+          if (PHIBlocks.insert(Succ).second) {
+            bool HasOpUse = false;
+            for (auto &I : *Succ) {
+              for (Use &U : I.uses()) {
+                if (U.get() == Op) {
+                  HasOpUse = true;
+                  break;
+                }
+              }
+            }
+            if (HasOpUse) {
+              Worklist.push_back(Succ);
+            }
+          }
+        }
+      }
+    }
+  }
+}
+
+bool PredicateInfoBuilder::needsPHIForPredicateInfo(Value *Op, BasicBlock *BB) {
+  if (BB->getSinglePredecessor())
+    return false;
+
+  const auto &ValueInfo = getValueInfo(Op);
+  SmallDenseSet<PredicateBase *, 4> PredPredicates;
+
+  for (BasicBlock *Pred : predecessors(BB)) {
+    PredicateBase *PredInfo = nullptr;
+
+    for (const auto *PInfo : ValueInfo.Infos) {
+      if (auto *PBranch = dyn_cast<PredicateBranch>(PInfo)) {
+        if (PBranch->From == Pred && PBranch->To == BB) {
+          PredInfo = const_cast<PredicateBase *>(PInfo);
+          break;
+        }
+      } else if (auto *PSwitch = dyn_cast<PredicateSwitch>(PInfo)) {
+        if (PSwitch->From == Pred && PSwitch->To == BB) {
+          PredInfo = const_cast<PredicateBase *>(PInfo);
+          break;
+        }
+      }
+    }
+
+    if (PredInfo) {
+      PredPredicates.insert(PredInfo);
+    }
+  }
+
+  return PredPredicates.size() > 1 ||
+         (PredPredicates.size() == 1 && pred_size(BB) > 1);
+}
+
+void PredicateInfoBuilder::insertPredicatePHIs(Value *Op, BasicBlock *BB) {
+  IRBuilder<> Builder(&BB->front());
+  PHINode *PHI = Builder.CreatePHI(Op->getType(), pred_size(BB),
+                                   Op->getName() + ".predicate.phi");
+
+  auto *PPhi = new (Allocator) PredicatePHI(Op, BB);
+  PPhi->RenamedOp = PHI;
+
+  const auto &ValueInfo = getValueInfo(Op);
+  for (BasicBlock *Pred : predecessors(BB)) {
+    Value *IncomingValue = Op;
+
+    for (const auto *PInfo : ValueInfo.Infos) {
+      if (auto *PBranch = dyn_cast<PredicateBranch>(PInfo)) {
+        if (PBranch->From == Pred && PBranch->To == BB) {
+          PPhi->IncomingPredicates.push_back(
+              {Pred, const_cast<PredicateBase *>(PInfo)});
+          break;
+        }
+      } else if (auto *PSwitch = dyn_cast<PredicateSwitch>(PInfo)) {
+        if (PSwitch->From == Pred && PSwitch->To == BB) {
+          PPhi->IncomingPredicates.push_back(
+              {Pred, const_cast<PredicateBase *>(PInfo)});
+          break;
+        }
+      }
+    }
+
+    PHI->addIncoming(IncomingValue, Pred);
+  }
+
+  PI.PredicateMap.insert({PHI, PPhi});
+
+  auto &OperandInfo = getOrCreateValueInfo(Op);
+  OperandInfo.Infos.push_back(PPhi);
+}
+
+void PredicateInfoBuilder::processPredicatePHIs(
+    SmallVectorImpl<Value *> &OpsToRename) {
+
+  for (const auto &Entry : PHICandidates) {
+    BasicBlock *PHIBlock = Entry.first;
+
+    for (Value *Op : Entry.second) {
+      insertPredicatePHIs(Op, PHIBlock);
+    }
+  }
+}
+
 // Build predicate info for our function
 void PredicateInfoBuilder::buildPredicateInfo() {
   DT.updateDFSNumbers();
@@ -479,6 +637,10 @@ void PredicateInfoBuilder::buildPredicateInfo() {
       if (DT.isReachableFromEntry(II->getParent()))
         processAssume(II, II->getParent(), OpsToRename);
   }
+
+  identifyPHIInsertionPoints(OpsToRename);
+  processPredicatePHIs(OpsToRename);
+
   // Now rename all our operations.
   renameUses(OpsToRename);
 }
@@ -774,10 +936,33 @@ std::optional<PredicateConstraint> PredicateBase::getConstraint() const {
     }
 
     return {{CmpInst::ICMP_EQ, cast<PredicateSwitch>(this)->CaseValue}};
+  case PT_PHI:
+    return cast<PredicatePHI>(this)->getConstraint();
   }
   llvm_unreachable("Unknown predicate type");
 }
 
+std::optional<PredicateConstraint> PredicatePHI::getConstraint() const {
+  // For PHI predicates, find the common constraint across all incoming edges
+  if (IncomingPredicates.empty())
+    return std::nullopt;
+
+  auto FirstConstraint = IncomingPredicates[0].second->getConstraint();
+  if (!FirstConstraint)
+    return std::nullopt;
+
+  // Verify all incoming predicates have the same constraint
+  for (size_t I = 1; I < IncomingPredicates.size(); ++I) {
+    auto Constraint = IncomingPredicates[I].second->getConstraint();
+    if (!Constraint || Constraint->Predicate != FirstConstraint->Predicate ||
+        Constraint->OtherOp != FirstConstraint->OtherOp) {
+      return std::nullopt;
+    }
+  }
+
+  return FirstConstraint;
+}
+
 void PredicateInfo::verifyPredicateInfo() const {}
 
 // Replace ssa_copy calls created by PredicateInfo with their operand.
@@ -839,6 +1024,10 @@ class PredicateInfoAnnotatedWriter : public AssemblyAnnotationWriter {
       } else if (const auto *PA = dyn_cast<PredicateAssume>(PI)) {
         OS << "; assume predicate info {"
            << " Comparison:" << *PA->Condition;
+      } else if (const auto *PP = dyn_cast<PredicatePHI>(PI)) {
+        OS << "; phi predicate info { PHIBlock: ";
+        PP->PHIBlock->printAsOperand(OS);
+        OS << " IncomingEdges: " << PP->IncomingPredicates.size();
       }
       OS << ", RenamedOp: ";
       PI->RenamedOp->printAsOperand(OS, false);

@Rajveer100 Rajveer100 requested a review from nikic July 29, 2025 12:06
@Rajveer100
Copy link
Member Author

A little unsure about the approach, here's some debug info:

Details

build/bin/opt -passes="print-predicateinfo,gvn,newgvn" -debug -S opt-const-store.ll -o opt-const-store-O3.ll
Args: build/bin/opt -passes=print-predicateinfo,gvn,newgvn -debug -S opt-const-store.ll -o opt-const-store-O3.ll 
PredicateInfo for function: h5diff
Visiting   %cond = icmp eq i32 %0, 0
Rename Stack is empty
Current DFS numbers are (1,2)
Rename Stack is empty
Current DFS numbers are (1,2)
Rename Stack Top DFS numbers are (1,2)
Current DFS numbers are (1,2)
Rename Stack Top DFS numbers are (1,2)
Current DFS numbers are (1,2)
Found replacement   %cond.0 = call i1 @llvm.ssa.copy.i1(i1 %cond) for   %cond = icmp eq i32 %0, 0 in   %cond.predicate.phi = phi i1 [ %cond, %5 ], [ %cond, %4 ], [ %cond, %3 ]
Rename Stack Top DFS numbers are (1,2)
Current DFS numbers are (3,4)
Rename Stack is empty
Current DFS numbers are (7,8)
Rename Stack is empty
Current DFS numbers are (7,8)
Rename Stack Top DFS numbers are (7,8)
Current DFS numbers are (7,8)
Rename Stack Top DFS numbers are (7,8)
Current DFS numbers are (7,8)
Found replacement   %cond.1 = call i1 @llvm.ssa.copy.i1(i1 %cond) for   %cond = icmp eq i32 %0, 0 in   %cond.predicate.phi = phi i1 [ %cond, %5 ], [ %cond, %4 ], [ %cond.0, %3 ]
Visiting i32 %0
Rename Stack is empty
Current DFS numbers are (0,9)
Rename Stack is empty
Current DFS numbers are (1,2)
Rename Stack Top DFS numbers are (1,2)
Current DFS numbers are (1,2)
Rename Stack Top DFS numbers are (1,2)
Current DFS numbers are (1,2)
Found replacement   %.0 = call i32 @llvm.ssa.copy.i32(i32 %0) for i32 %0 in   %.predicate.phi = phi i32 [ %0, %5 ], [ %0, %4 ], [ %0, %3 ]
Rename Stack Top DFS numbers are (1,2)
Current DFS numbers are (3,4)
Rename Stack is empty
Current DFS numbers are (3,4)
Rename Stack is empty
Current DFS numbers are (7,8)
Rename Stack Top DFS numbers are (7,8)
Current DFS numbers are (7,8)
Rename Stack Top DFS numbers are (7,8)
Current DFS numbers are (7,8)
Found replacement   %.1 = call i32 @llvm.ssa.copy.i32(i32 %0) for i32 %0 in   %.predicate.phi = phi i32 [ %0, %5 ], [ %0, %4 ], [ %.0, %3 ]
define noundef i32 @h5diff(i32 %0, i1 %1) local_unnamed_addr {
  %cond = icmp eq i32 %0, 0
  br i1 %1, label %3, label %4

3:                                                ; preds = %2
  store i32 1, ptr @p, align 4
; Has predicate info
; branch predicate info { TrueEdge: 0 Comparison:  %cond = icmp eq i32 %0, 0 Edge: [label %3,label %common.ret], RenamedOp: %cond }
  %cond.0 = call i1 @llvm.ssa.copy.i1(i1 %cond)
; Has predicate info
; branch predicate info { TrueEdge: 0 Comparison:  %cond = icmp eq i32 %0, 0 Edge: [label %3,label %common.ret], RenamedOp: %0 }
  %.0 = call i32 @llvm.ssa.copy.i32(i32 %0)
  br i1 %cond, label %5, label %common.ret

common.ret:                                       ; preds = %5, %4, %3
; Has predicate info
; phi predicate info { PHIBlock: label %common.ret IncomingEdges: 2, RenamedOp: %.predicate.phi }
  %.predicate.phi = phi i32 [ %0, %5 ], [ %.1, %4 ], [ %.0, %3 ]
; Has predicate info
; phi predicate info { PHIBlock: label %common.ret IncomingEdges: 2, RenamedOp: %cond.predicate.phi }
  %cond.predicate.phi = phi i1 [ %cond, %5 ], [ %cond.1, %4 ], [ %cond.0, %3 ]
  ret i32 0

4:                                                ; preds = %2
  store i32 2, ptr @p, align 4
; Has predicate info
; branch predicate info { TrueEdge: 0 Comparison:  %cond = icmp eq i32 %0, 0 Edge: [label %4,label %common.ret], RenamedOp: %cond }
  %cond.1 = call i1 @llvm.ssa.copy.i1(i1 %cond)
; Has predicate info
; branch predicate info { TrueEdge: 0 Comparison:  %cond = icmp eq i32 %0, 0 Edge: [label %4,label %common.ret], RenamedOp: %0 }
  %.1 = call i32 @llvm.ssa.copy.i32(i32 %0)
  br i1 %cond, label %5, label %common.ret

5:                                                ; preds = %4, %3
  store i32 %0, ptr @p, align 4
  br label %common.ret
}
GVN iteration: 0
Replace dominated use of 'i1 %cond' with i1 false in   %cond.predicate.phi = phi i1 [ %cond, %5 ], [ %cond, %4 ], [ %cond, %3 ]
Replace dominated use of 'i1 %cond' with i1 false in   %cond.predicate.phi = phi i1 [ %cond, %5 ], [ false, %4 ], [ %cond, %3 ]
GVN iteration: 1
Visiting   %cond = icmp eq i32 %0, 0
Rename Stack is empty
Current DFS numbers are (1,2)
Rename Stack is empty
Current DFS numbers are (1,2)
Rename Stack Top DFS numbers are (1,2)
Current DFS numbers are (1,2)
Rename Stack Top DFS numbers are (1,2)
Current DFS numbers are (1,2)
Found replacement   %cond.0 = call i1 @llvm.ssa.copy.i1(i1 %cond) for   %cond = icmp eq i32 %0, 0 in   %cond.predicate.phi1 = phi i1 [ %cond, %5 ], [ %cond, %4 ], [ %cond, %3 ]
Rename Stack Top DFS numbers are (1,2)
Current DFS numbers are (3,4)
Rename Stack is empty
Current DFS numbers are (3,4)
Rename Stack is empty
Current DFS numbers are (7,8)
Rename Stack is empty
Current DFS numbers are (7,8)
Rename Stack Top DFS numbers are (7,8)
Current DFS numbers are (7,8)
Rename Stack Top DFS numbers are (7,8)
Current DFS numbers are (7,8)
Found replacement   %cond.1 = call i1 @llvm.ssa.copy.i1(i1 %cond) for   %cond = icmp eq i32 %0, 0 in   %cond.predicate.phi1 = phi i1 [ %cond, %5 ], [ %cond, %4 ], [ %cond.0, %3 ]
Visiting i32 %0
Rename Stack is empty
Current DFS numbers are (0,9)
Rename Stack is empty
Current DFS numbers are (1,2)
Rename Stack Top DFS numbers are (1,2)
Current DFS numbers are (1,2)
Rename Stack Top DFS numbers are (1,2)
Current DFS numbers are (1,2)
Found replacement   %.0 = call i32 @llvm.ssa.copy.i32(i32 %0) for i32 %0 in   %.predicate.phi = phi i32 [ %0, %5 ], [ %0, %4 ], [ %0, %3 ]
Rename Stack Top DFS numbers are (1,2)
Current DFS numbers are (3,4)
Rename Stack is empty
Current DFS numbers are (3,4)
Rename Stack is empty
Current DFS numbers are (7,8)
Rename Stack Top DFS numbers are (7,8)
Current DFS numbers are (7,8)
Rename Stack Top DFS numbers are (7,8)
Current DFS numbers are (7,8)
Found replacement   %.1 = call i32 @llvm.ssa.copy.i32(i32 %0) for i32 %0 in   %.predicate.phi = phi i32 [ %0, %5 ], [ %0, %4 ], [ %.0, %3 ]
Skipping trivially dead instruction   %.predicate.phi = phi i32 [ %0, %5 ], [ %.1, %4 ], [ %.0, %3 ]
Marking   %.predicate.phi = phi i32 [ %0, %5 ], [ %.1, %4 ], [ %.0, %3 ] for deletion
Skipping trivially dead instruction   %cond.predicate.phi1 = phi i1 [ %cond, %5 ], [ %cond.1, %4 ], [ %cond.0, %3 ]
Marking   %cond.predicate.phi1 = phi i1 [ %cond, %5 ], [ %cond.1, %4 ], [ %cond.0, %3 ] for deletion
Skipping trivially dead instruction   %cond.predicate.phi = phi i1 [ %cond, %5 ], [ false, %4 ], [ false, %3 ]
Marking   %cond.predicate.phi = phi i1 [ %cond, %5 ], [ false, %4 ], [ false, %3 ] for deletion
Block %2 marked reachable
Processing instruction   %cond = icmp eq i32 %0, 0
Created new congruence class for   %cond = icmp eq i32 %0, 0 using expression { ExpressionTypeBasic, opcode = 13600, operands = {[0] = i32 0  [1] = i32 %0  } } at 4 and leader   %cond = icmp eq i32 %0, 0
New class 4 for expression { ExpressionTypeBasic, opcode = 13600, operands = {[0] = i32 0  [1] = i32 %0  } }
Processing instruction   br i1 %1, label %3, label %4
Block %3 marked reachable
Block %4 marked reachable
Processing instruction   store i32 2, ptr @p, align 4
Created new congruence class for   store i32 2, ptr @p, align 4 using expression { ExpressionTypeStore, opcode = 0, operands = {[0] = ptr @p  }  represents Store    store i32 2, ptr @p, align 4 with StoredValue i32 2 and MemoryLeader 2 = MemoryDef(liveOnEntry)} at 5 and leader   store i32 2, ptr @p, align 4 and stored value i32 2
New class 5 for expression { ExpressionTypeStore, opcode = 0, operands = {[0] = ptr @p  }  represents Store    store i32 2, ptr @p, align 4 with StoredValue i32 2 and MemoryLeader 2 = MemoryDef(liveOnEntry)}
Memory class leader change for class 5 due to new memory instruction becoming leader
Setting 2 = MemoryDef(liveOnEntry) equivalent to congruence class 5 with current MemoryAccess leader 2 = MemoryDef(liveOnEntry)
Processing instruction   %cond.1 = call i1 @llvm.ssa.copy.i1(i1 %cond)
Found predicate info from instruction !
Created new congruence class for   %cond.1 = call i1 @llvm.ssa.copy.i1(i1 %cond) using expression { ExpressionTypeConstant, opcode = 5,  constant = i1 false} at 6 and leader i1 false
New class 6 for expression { ExpressionTypeConstant, opcode = 5,  constant = i1 false}
Processing instruction   %.1 = call i32 @llvm.ssa.copy.i32(i32 %0)
Found predicate info from instruction !
New class 2 for expression { ExpressionTypeVariable, opcode = 22,  variable = i32 %0}
Processing instruction   br i1 %cond, label %5, label %common.ret
Block %5 marked reachable
Block common.ret marked reachable
Processing instruction   store i32 1, ptr @p, align 4
Created new congruence class for   store i32 1, ptr @p, align 4 using expression { ExpressionTypeStore, opcode = 0, operands = {[0] = ptr @p  }  represents Store    store i32 1, ptr @p, align 4 with StoredValue i32 1 and MemoryLeader 1 = MemoryDef(liveOnEntry)} at 7 and leader   store i32 1, ptr @p, align 4 and stored value i32 1
New class 7 for expression { ExpressionTypeStore, opcode = 0, operands = {[0] = ptr @p  }  represents Store    store i32 1, ptr @p, align 4 with StoredValue i32 1 and MemoryLeader 1 = MemoryDef(liveOnEntry)}
Memory class leader change for class 7 due to new memory instruction becoming leader
Setting 1 = MemoryDef(liveOnEntry) equivalent to congruence class 7 with current MemoryAccess leader 1 = MemoryDef(liveOnEntry)
Processing instruction   %cond.0 = call i1 @llvm.ssa.copy.i1(i1 %cond)
Found predicate info from instruction !
New class 6 for expression { ExpressionTypeConstant, opcode = 5,  constant = i1 false}
Processing instruction   %.0 = call i32 @llvm.ssa.copy.i32(i32 %0)
Found predicate info from instruction !
New class 2 for expression { ExpressionTypeVariable, opcode = 22,  variable = i32 %0}
Processing instruction   br i1 %cond, label %5, label %common.ret
Block %5 was reachable, but new edge {%3,%5} to it found
Block common.ret was reachable, but new edge {%3,common.ret} to it found
Processing MemoryPhi 4 = MemoryPhi({%3,1},{%4,2})
Memory Phi value numbered to itself
Setting 4 = MemoryPhi({%3,1},{%4,2}) equivalent to congruence class 8 with current MemoryAccess leader 4 = MemoryPhi({%3,1},{%4,2})
Processing instruction   store i32 %0, ptr @p, align 4
Created new congruence class for   store i32 %0, ptr @p, align 4 using expression { ExpressionTypeStore, opcode = 0, operands = {[0] = ptr @p  }  represents Store    store i32 %0, ptr @p, align 4 with StoredValue i32 %0 and MemoryLeader 3 = MemoryDef(4)} at 9 and leader   store i32 %0, ptr @p, align 4 and stored value i32 %0
New class 9 for expression { ExpressionTypeStore, opcode = 0, operands = {[0] = ptr @p  }  represents Store    store i32 %0, ptr @p, align 4 with StoredValue i32 %0 and MemoryLeader 3 = MemoryDef(4)}
Memory class leader change for class 9 due to new memory instruction becoming leader
Setting 3 = MemoryDef(4) equivalent to congruence class 9 with current MemoryAccess leader 3 = MemoryDef(4)
Processing instruction   br label %common.ret
Block common.ret was reachable, but new edge {%5,common.ret} to it found
Processing MemoryPhi 5 = MemoryPhi({%3,1},{%5,3},{%4,2})
Memory Phi value numbered to itself
Setting 5 = MemoryPhi({%3,1},{%5,3},{%4,2}) equivalent to congruence class 10 with current MemoryAccess leader 5 = MemoryPhi({%3,1},{%5,3},{%4,2})
Processing instruction   ret i32 0
Beginning iteration verification
Processing instruction   %cond = icmp eq i32 %0, 0
Processing instruction   br i1 %1, label %3, label %4
Processing instruction   store i32 2, ptr @p, align 4
Processing instruction   %cond.1 = call i1 @llvm.ssa.copy.i1(i1 %cond)
Found predicate info from instruction !
Processing instruction   %.1 = call i32 @llvm.ssa.copy.i32(i32 %0)
Found predicate info from instruction !
Processing instruction   br i1 %cond, label %5, label %common.ret
Processing instruction   store i32 1, ptr @p, align 4
Processing instruction   %cond.0 = call i1 @llvm.ssa.copy.i1(i1 %cond)
Found predicate info from instruction !
Processing instruction   %.0 = call i32 @llvm.ssa.copy.i32(i32 %0)
Found predicate info from instruction !
Processing instruction   br i1 %cond, label %5, label %common.ret
Processing MemoryPhi 4 = MemoryPhi({%3,1},{%4,2})
Memory Phi value numbered to itself
Setting 4 = MemoryPhi({%3,1},{%4,2}) equivalent to congruence class 8 with current MemoryAccess leader 4 = MemoryPhi({%3,1},{%4,2})
Processing instruction   store i32 %0, ptr @p, align 4
Processing instruction   br label %common.ret
Processing MemoryPhi 5 = MemoryPhi({%3,1},{%5,3},{%4,2})
Memory Phi value numbered to itself
Setting 5 = MemoryPhi({%3,1},{%5,3},{%4,2}) equivalent to congruence class 10 with current MemoryAccess leader 5 = MemoryPhi({%3,1},{%5,3},{%4,2})
Processing instruction   ret i32 0
Eliminating in congruence class 10
Eliminating in congruence class 9
Eliminating in congruence class 8
Eliminating in congruence class 7
Eliminating in congruence class 6
Found replacement i1 false for   %cond.1 = call i1 @llvm.ssa.copy.i1(i1 %cond)
Replacing   %cond.1 = call i1 @llvm.ssa.copy.i1(i1 %cond) with i1 false
Marking   %cond.1 = call i1 @llvm.ssa.copy.i1(i1 %cond) for deletion
Found replacement i1 false for   %cond.0 = call i1 @llvm.ssa.copy.i1(i1 %cond)
Replacing   %cond.0 = call i1 @llvm.ssa.copy.i1(i1 %cond) with i1 false
Marking   %cond.0 = call i1 @llvm.ssa.copy.i1(i1 %cond) for deletion
Eliminating in congruence class 5
Eliminating in congruence class 4
Eliminating in congruence class 3
Eliminating in congruence class 2
Found replacement i32 %0 for   %.1 = call i32 @llvm.ssa.copy.i32(i32 %0)
Replacing   %.1 = call i32 @llvm.ssa.copy.i32(i32 %0) with i32 %0
Marking   %.1 = call i32 @llvm.ssa.copy.i32(i32 %0) for deletion
Found replacement i32 %0 for   %.0 = call i32 @llvm.ssa.copy.i32(i32 %0)
Replacing   %.0 = call i32 @llvm.ssa.copy.i32(i32 %0) with i32 %0
Marking   %.0 = call i32 @llvm.ssa.copy.i32(i32 %0) for deletion
Eliminating in congruence class 1
Eliminating in congruence class 0
Congruence class 0 has 3 members
Congruence class 1 has 0 members
Congruence class 2 has 1 members
Congruence class 3 has 1 members
Congruence class 4 has 1 members
Congruence class 5 has 1 members
Congruence class 6 has 0 members
Congruence class 7 has 1 members
Congruence class 8 has 0 members
Congruence class 9 has 1 members
Congruence class 10 has 0 members

The predicate information looks fine to me, but I think I am missing something that's not causing the store to change.

Copy link
Contributor

@nikic nikic left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd expect you to place an actual phi node (which combines one predicated input and other non-predicated input), not a new type of predicate.

@Rajveer100
Copy link
Member Author

I do insert them if you look at the debug info.

@nikic
Copy link
Contributor

nikic commented Jul 29, 2025

Can you please add a test that shows the generated IR? It's hard to comment in abstract.

@Rajveer100
Copy link
Member Author

Rajveer100 commented Jul 29, 2025

Below would be clearer:

build/bin/opt -passes="print-predicateinfo" -S opt-const-store.ll -o opt-const-store-O3.ll
PredicateInfo for function: h5diff
define noundef i32 @h5diff(i32 %0, i1 %1) local_unnamed_addr {
  %cond = icmp eq i32 %0, 0
  br i1 %1, label %3, label %4

3:                                                ; preds = %2
  store i32 1, ptr @p, align 4
; Has predicate info
; branch predicate info { TrueEdge: 0 Comparison:  %cond = icmp eq i32 %0, 0 Edge: [label %3,label %common.ret], RenamedOp: %cond }
  %cond.0 = call i1 @llvm.ssa.copy.i1(i1 %cond)
; Has predicate info
; branch predicate info { TrueEdge: 0 Comparison:  %cond = icmp eq i32 %0, 0 Edge: [label %3,label %common.ret], RenamedOp: %0 }
  %.0 = call i32 @llvm.ssa.copy.i32(i32 %0)
  br i1 %cond, label %5, label %common.ret

common.ret:                                       ; preds = %5, %4, %3
; Has predicate info
; phi predicate info { PHIBlock: label %common.ret IncomingEdges: 2, RenamedOp: %.predicate.phi }
  %.predicate.phi = phi i32 [ %0, %5 ], [ %.1, %4 ], [ %.0, %3 ]
; Has predicate info
; phi predicate info { PHIBlock: label %common.ret IncomingEdges: 2, RenamedOp: %cond.predicate.phi }
  %cond.predicate.phi = phi i1 [ %cond, %5 ], [ %cond.1, %4 ], [ %cond.0, %3 ]
  ret i32 0

4:                                                ; preds = %2
  store i32 2, ptr @p, align 4
; Has predicate info
; branch predicate info { TrueEdge: 0 Comparison:  %cond = icmp eq i32 %0, 0 Edge: [label %4,label %common.ret], RenamedOp: %cond }
  %cond.1 = call i1 @llvm.ssa.copy.i1(i1 %cond)
; Has predicate info
; branch predicate info { TrueEdge: 0 Comparison:  %cond = icmp eq i32 %0, 0 Edge: [label %4,label %common.ret], RenamedOp: %0 }
  %.1 = call i32 @llvm.ssa.copy.i32(i32 %0)
  br i1 %cond, label %5, label %common.ret

5:                                                ; preds = %4, %3
  store i32 %0, ptr @p, align 4
  br label %common.ret
}

@nikic
Copy link
Contributor

nikic commented Jul 29, 2025

It looks like the store in 5: still uses %0, without a phi. We only have the phi in common.ret, where the value is not actually used...

@Rajveer100 Rajveer100 force-pushed the opt-const-store branch 3 times, most recently from b58b62c to fa9177b Compare July 30, 2025 13:21
@Rajveer100
Copy link
Member Author

After debugging under lldb and looking further, I understood the issue.

Re-implementing it :)

@Rajveer100
Copy link
Member Author

Rajveer100 commented Aug 1, 2025

I was just wondering if we could simply add a fold or case based optimization like what instcombine does by recognizing patterns rather than making changes in predicate info?

From your point of view, do you think adding phi nodes covers many cases potentially apart from generalizing and using existing code?

Copy link
Member Author

@Rajveer100 Rajveer100 left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So I managed to get the predicate information to work correctly and achieve the final output as well:

store i32 0, ptr @p, align 4

More info below.

@Rajveer100 Rajveer100 requested a review from nikic August 3, 2025 12:11
@Rajveer100
Copy link
Member Author

Rajveer100 commented Aug 9, 2025

@nikic
Could you review this, I have added tests, will address existing test failures if the approach seems reasonable?

@Rajveer100
Copy link
Member Author

@nikic
Gentle ping!

Copy link
Contributor

@nikic nikic left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This needs a rebase as use of ssa.copy has been removed.

The last time I tried this patch it failed to bootstrap clang: https://llvm-compile-time-tracker.com/show_error.php?commit=9257de61b46d07932842d91ee25304007f1cdf66 But I see you made more changes since, maybe it's fixed.

@Rajveer100
Copy link
Member Author

This needs a rebase as use of ssa.copy has been removed.

The last time I tried this patch it failed to bootstrap clang: https://llvm-compile-time-tracker.com/show_error.php?commit=9257de61b46d07932842d91ee25304007f1cdf66 But I see you made more changes since, maybe it's fixed.

Interesting, I believe you are referring to:

#151174

@Rajveer100
Copy link
Member Author

I had some stale build files and weird behaviors for tests which showed as passed on my local machine and fail alternatively when running. Cleaning them and re-checking.

@Rajveer100
Copy link
Member Author

Linux tests have passed, looking into the Windows CI, seems to be just a labelling issue.

@Rajveer100
Copy link
Member Author

Rajveer100 commented Aug 14, 2025

This test file:
llvm/test/Transforms/LoopVersioningLICM/loopversioningLICM1.ll

--- a/llvm/test/Transforms/LoopVersioningLICM/loopversioningLICM1.ll
+++ b/llvm/test/Transforms/LoopVersioningLICM/loopversioningLICM1.ll
@@ -68,10 +68,10 @@ define i32 @foo(ptr nocapture %var1, ptr nocapture readnone %var2, ptr nocapture
 ; CHECK-NEXT:    [[ADD8]] = add nsw i32 [[ADD86]], [[ADD]]
 ; CHECK-NEXT:    [[INC]] = add nuw i32 [[J_113]], 1
 ; CHECK-NEXT:    [[CMP2:%.*]] = icmp ult i32 [[INC]], [[ITR]]
-; CHECK-NEXT:    br i1 [[CMP2]], label [[FOR_BODY3]], label [[FOR_INC11_LOOPEXIT_LOOPEXIT7:%.*]], !llvm.loop [[LOOP7:![0-9]+]]
+; CHECK-NEXT:    br i1 [[CMP2]], label [[FOR_BODY3]], label [[FOR_INC11_LOOPEXIT_LOOPEXIT6:%.*]], !llvm.loop [[LOOP7:![0-9]+]]
 ; CHECK:       for.inc11.loopexit.loopexit:
 ; CHECK-NEXT:    br label [[FOR_INC11_LOOPEXIT:%.*]]
-; CHECK:       for.inc11.loopexit.loopexit7:
+; CHECK:       for.inc11.loopexit.loopexit6:
 ; CHECK-NEXT:    [[ADD8_LCSSA:%.*]] = phi i32 [ [[ADD8]], [[FOR_BODY3]] ]
 ; CHECK-NEXT:    store i32 [[ADD8_LCSSA]], ptr [[ARRAYIDX7]], align 4, !alias.scope [[META2]]
 ; CHECK-NEXT:    br label [[FOR_INC11_LOOPEXIT]]

It's unpredictable, keeps switching between 6 and 7. It has no phi predicates, so unrelated to my change.

@Rajveer100
Copy link
Member Author

All checks have passed.

@Rajveer100
Copy link
Member Author

@nikic
Any other comments apart from the predicate phis?

Copy link
Contributor

@fhahn fhahn left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@Rajveer100 could you add a test that shows an improvement for a transform?

The description mentions this resolves #150606, but it doesn't look like there's a test case that shows an improvement?

The NewGVN and LoopVersioningLICM changes seem to just be artifacts of running the test update scripts?

…PHI`

when needing introduction of phi nodes

Resolves llvm#150606

Currently `ssa.copy` is used mostly for straight line code, i.e, without
joins or uses of phi nodes. With this, passes would be able to pick up the
relevant info and further optimize the IR.
@Rajveer100
Copy link
Member Author

@Rajveer100 could you add a test that shows an improvement for a transform?

The description mentions this resolves #150606, but it doesn't look like there's a test case that shows an improvement?

The NewGVN and LoopVersioningLICM changes seem to just be artifacts of running the test update scripts?

Added.

@Rajveer100 Rajveer100 requested a review from fhahn August 15, 2025 10:37
@Rajveer100
Copy link
Member Author

@fhahn
Anything else that's needed?

@Rajveer100
Copy link
Member Author

@nikic
Ping :)

Copy link
Contributor

@nikic nikic left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry, I'm not convinced. Please remove PredicatePHI.

@Rajveer100
Copy link
Member Author

Alright, I will make the changes and we can continue review from there :)

@Rajveer100
Copy link
Member Author

@nikic
There's actually one thing I missed to share with you. Right now, with PredicatePHI this is well integrated with the renaming stack and so we don't need to handle anything extra when we place sea copies since the edge predicates would have already been inserted.

After removal:

    // For edge predicates, we can just place the operand in the block before
    // the terminator. For assume, we have to place it right after the assume
    // to ensure we dominate all uses except assume itself. Always insert
    // right before the terminator or after the assume, so that we insert in
    // proper order in the case of multiple predicateinfo in the same block.
    if (isa<PredicateWithEdge>(ValInfo)) {
      BitCastInst *PIC = CreateSSACopy(getBranchTerminator(ValInfo), Op,
                                       Op->getName() + "." + Twine(Counter++));
      PI.PredicateMap.insert({PIC, ValInfo});
      Result.Def = PIC;
    } else {
      auto *PAssume = dyn_cast<PredicateAssume>(ValInfo);
      assert(PAssume &&
             "Should not have gotten here without it being an assume");
      // Insert the predicate directly after the assume. While it also holds
      // directly before it, assume(i1 true) is not a useful fact.
      BitCastInst *PIC = CreateSSACopy(PAssume->AssumeInst->getNextNode(), Op);
      PI.PredicateMap.insert({PIC, ValInfo});
      Result.Def = PIC;
    }

    for (const auto &Entry : InsertedPHIBlocks) {
      BasicBlock *PHIBlock = Entry.first;
      Value *OpInBlock = Entry.second;
      if (DT.dominates(...)) {
        BitCastInst *PIC =
            CreateSSACopy(&*PHIBlock->getFirstInsertionPt(), OpInBlock);
        Result.Def = PIC;
      }
    }

We would need some way to determine to dominated uses for this predicate which is an extra overhead to check for. For this, we would need to do it after we completely materialize the stack and then insert them.

This would mean moving the lambda function for creating ssa copy as well extra checks.

In fact, the rename use definition itself insists on this fact:

// Instead of the standard SSA renaming algorithm, which is O(Number of
// instructions), and walks the entire dominator tree, we walk only the defs +
// uses.  The standard SSA renaming algorithm does not really rely on the
// dominator tree except to order the stack push/pops of the renaming stacks, so
// that defs end up getting pushed before hitting the correct uses.  This does
// not require the dominator tree, only the *order* of the dominator tree. The
// complete and correct ordering of the defs and uses, in dominator tree is
// contained in the DFS numbering of the dominator tree. So we sort the defs and
// uses into the DFS ordering, and then just use the renaming stack as per
// normal, pushing when we hit a def (which is a predicateinfo instruction),
// popping when we are out of the dfs scope for that def, and replacing any uses
// with top of stack if it exists.  In order to handle liveness without
// propagating liveness info, we don't actually insert the predicateinfo
// instruction def until we see a use that it would dominate.  Once we see such
// a use, we materialize the predicateinfo instruction in the right place and
// use it.
//
// TODO: Use this algorithm to perform fast single-variable renaming in
// promotememtoreg and memoryssa.

Please try to look into it once again.

@nikic
Copy link
Contributor

nikic commented Aug 20, 2025

I'm not sure I fully understand what you mean here. Is this about renaming uses so that the inserted phis get used?

The phis do need to be integrated with renaming, but treating them like a plain predicate is not quite right. If you do that, then you'll introduce a bitcast (formerly ssa.copy). But we should not be inserting a bitcast, things should directly use the phi nodes.

@Rajveer100
Copy link
Member Author

I think we're on the same page.

So, from a performance standpoint what do you think is better? If I understand correctly, if we don't use bitcast we would need to replace all uses for the given Op we are dealing with?

Whereas, if we just let the predicate be, it doesn't need any further handling.

@Rajveer100
Copy link
Member Author

  for (Value *Op : OpsToRename) {
    if (NewPHIs.count(Op)) {
      Value *PHI = PHIs[Op];
      SmallVector<Use *, 8> UsesToReplace;
      for (Use &U : Op->uses()) {
        if (Instruction *I = dyn_cast<Instruction>(U.getUser())) {
          if (I->getParent() == PHI->getParent()) {
            UsesToReplace.push_back(&U);
          }
        }
      }
      for (Use *U : UsesToReplace) {
        U->set(PHI);
      }
    }
  }

This is what I am talking about, this is quite inefficient as compared to what I have done even though we introduced a new PredicatePHI.

@Rajveer100 Rajveer100 requested a review from nikic August 22, 2025 08:04
@Rajveer100
Copy link
Member Author

@nikic
What's your thought process for above?

@Rajveer100
Copy link
Member Author

@nikic
Gentle ping.

@Rajveer100
Copy link
Member Author

  for (Value *Op : OpsToRename) {

    if (NewPHIs.count(Op)) {

      Value *PHI = PHIs[Op];

      SmallVector<Use *, 8> UsesToReplace;

      for (Use &U : Op->uses()) {

        if (Instruction *I = dyn_cast<Instruction>(U.getUser())) {

          if (I->getParent() == PHI->getParent()) {

            UsesToReplace.push_back(&U);

          }

        }

      }

      for (Use *U : UsesToReplace) {

        U->set(PHI);

      }

    }

  }

This is what I am talking about, this is quite inefficient as compared to what I have done even though we introduced a new PredicatePHI.

@fhahn
Could you look into this.

Copy link
Contributor

@arsenm arsenm left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Description still mentions ssa copy but that's not involved

@Rajveer100
Copy link
Member Author

Rajveer100 commented Oct 11, 2025

Description still mentions ssa copy but that's not involved

Yeah, it should be changed to bitcast. By the way, what do you think about the patch itself, removing PredicatePHI should be done here as suggested or something that can be a potential follow up?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

Missed Optimization: Constant Store Not Folded in Nested Conditional Control Flow in the form of (C2 ? (C1 ? C1 : X) : (C1 ? C1 : Y))

5 participants